import requests
import re
import url
import random
import time

headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'Connection': 'keep-alive',
    'Cookie': '_ga=GA1.2.1675229666.1610187138; _gid=GA1.2.1180088115.1610187138; DATA=X-qx6mIrphkO@WIPO@NifAAAAXk',
    'Host': 'e4ftl01.cr.usgs.gov',
    'sec-ch-ua': '"Google Chrome";v="87", " Not;A Brand";v="99", "Chromium";v="87"',
    'sec-ch-ua-mobile': '?0',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'none',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36'
}

proxies = [{
    "http": "http://190.2.214.90:999",
},
    {
        "http": "http://20.186.110.157:3128"
    }
]


def download(url):
    file_rule = re.match(r"https://.*//.*/.*/.*/(.*)/(.*)", url)
    file_name = "C:\\Users\\91481\\Desktop\\hdf_data\\" + file_rule.groups()[0] + '-' + file_rule.groups()[1]

    response = requests.get(url, headers=headers, proxies=random.choice(proxies))
    print(response.status_code)
    print(len(response.content))

    with open(file_name, 'wb') as f:
        content = response.content
        f.write(content)


if __name__ == '__main__':
    urls = url.filter()
    # print(url.get_url_list()[0])
    # download(url.get_url_list()[0])
    for i in urls:
        print(i)
        time.sleep(20)
        # download(i)
